# coding=utf8
import json
import re
import time
from urllib.parse import quote

import requests
from bs4 import BeautifulSoup

from libs.help import time_to_str, str_to_time
from models.search import model_search, model_search_log, model_aliexpress_order
from shop_item_statics import get_items

re_pattern_num = re.compile(r'[1-9]\d*\.\d*|0\.\d*[1-9]\d*|[0-9]\d*')


def get_order_list_aliexpress(pid, product_id, page):
    surl = 'http://feedback.aliexpress.com/display/evaluationProductDetailAjaxService.htm?productId={product_id}&type=default&page={page}&_=' + str(
        int(time.time()) * 1000)
    url = surl.format(product_id=product_id, page=page)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2767.4 Safari/537.36',
        "Cookie": "ali_apache_id=10.181.239.59.1466747292275.065255.2; acs_usuc_t=acs_rt=7ff5e76d4aca46aca5fb2469303fc5f3; xman_t=9+aeCT3KGjp2PHz4q/XzHYt9oXdRo4POs3S72Kcqofkwds2Vbsso5hJZOUeKsD6k; xman_f=bg2iIMRFxmZZ0bJRny+/H6ry/HR5LPxJ3SJGk8tws2Am/atFlpdU8XyWusIawW/YXw1ToDPJ9S3P8yjUpO1bYg7R/8wAZt7KDmq2wT3IlsANk+Fp2T0Vpg==; ali_beacon_id=10.181.239.59.1466747292275.065255.2; cna=G9fqDx4PZUQCATFQEtmvLj17; _mle_tmp0=eNrz4A12DQ729PeL9%2FV3cfUx8KvOTLFSMnUytjRxcjI3MzA2NnZyMbEwMXEzczYzMTJyM3I2tHRU0kkusTI0MTMzNzUwANImRjqJyWgCuRVWBrVRAJKhF6E%3D; _umdata=486B7B12C6AA95F22ACF9ADA55228D1D81BC72F7C4BA1DB25F3ABC84ACE0EDF27D593046B88E9BD8B2BFF8A6F0649AB3C96313245EBF8075523391BEA5F66987E1D4BE4BF1554537021DE4C807A8670982D229CE4F283C3EE9D47576BAB8EFC7; alievaluation_ab=49.80.18.35.1466747332210.5; JSESSIONID=1D63909E0E029B49BF1E49E66413151B; xman_us_f=x_l=1&x_locale=en_US; intl_locale=en_US; intl_common_forever=bd8uyIc/W0f9OCBjDWpStj74AJ49PiOc8EC7ndJ4IQUdhLzdfn3b2g==; aep_usuc_f=region=US&site=glo&b_locale=en_US&c_tp=USD; _ga=GA1.2.1878279729.1466749999; l=At7eaW2D/kursm/uQ76tfZu7rn4gEqMn; ali_apache_track=; ali_apache_tracktmp=",
        "Host": "feedback.aliexpress.com",
        'Pragma': 'no-cache',
        "Upgrade-Insecure-Requests": "1"
    }
    print(url)
    try:
        r = requests.get(url, headers=headers)
    except:
        print("太快,重试一次")
        time.sleep(5)
        r = requests.get(url, headers=headers)
    try:
        re_data = json.loads(r.text.replace('\\', r'\\'))
    except:
        print("无法解析json,暂时抛弃")
        return True
    if not re_data['records'] or page > 200:
        return False
    for item in re_data['records']:
        if model_aliexpress_order.find_one({'OrderId': item['id']}) is not None:
            continue
        timestamp = str_to_time(item['date'], format='%d %b %Y %H:%M')
        model_aliexpress_order.insert_one({
            'ProductId': product_id,
            'OrderId': item['id'],
            'CountryCode': item['countryCode'],
            'CountryName': item['countryName'],
            'Quantity': int(item['quantity']),
            'Price': float(item['price']),
            'Unit': item['unit'],
            'DateTimeStamp': timestamp,
            'DateTimeStr': time_to_str("%Y-%m-%d %H:%M", unixtime=timestamp)
        })
    if re_data['page']['total'] == re_data['page']['current']:
        return False
    return True


def search_aliexpress(pid, keyword, page=1):
    # surl = 'http://www.aliexpress.com/wholesale?initiative_id=SB_20160623222354&site=glo&shipCountry=us&g=y&SearchText={keyword}&page={page}'
    surl = 'http://www.aliexpress.com/af/{keyword}{page}.html?site=glo&SearchText={keyword}&g=y&CatId=2&isAffiliate=y&shipCountry=ae&isrefine=y'
    if page == 1:
        page = ''
    else:
        page = '/' + str(page)
    url = surl.format(keyword=quote(keyword), page=page)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2767.4 Safari/537.36',
        "Cookie": "ali_apache_id=10.181.239.59.1466747292275.065255.2; acs_usuc_t=acs_rt=7ff5e76d4aca46aca5fb2469303fc5f3; xman_t=9+aeCT3KGjp2PHz4q/XzHYt9oXdRo4POs3S72Kcqofkwds2Vbsso5hJZOUeKsD6k; xman_f=bg2iIMRFxmZZ0bJRny+/H6ry/HR5LPxJ3SJGk8tws2Am/atFlpdU8XyWusIawW/YXw1ToDPJ9S3P8yjUpO1bYg7R/8wAZt7KDmq2wT3IlsANk+Fp2T0Vpg==; ali_beacon_id=10.181.239.59.1466747292275.065255.2; cna=G9fqDx4PZUQCATFQEtmvLj17; _mle_tmp0=eNrz4A12DQ729PeL9%2FV3cfUx8KvOTLFSMnUytjRxcjI3MzA2NnZyMbEwMXEzczYzMTJyM3I2tHRU0kkusTI0MTMzNzUwANImRjqJyWgCuRVWBrVRAJKhF6E%3D; _umdata=486B7B12C6AA95F22ACF9ADA55228D1D81BC72F7C4BA1DB25F3ABC84ACE0EDF27D593046B88E9BD8B2BFF8A6F0649AB3C96313245EBF8075523391BEA5F66987E1D4BE4BF1554537021DE4C807A8670982D229CE4F283C3EE9D47576BAB8EFC7; alievaluation_ab=49.80.18.35.1466747332210.5; JSESSIONID=1D63909E0E029B49BF1E49E66413151B; xman_us_f=x_l=1&x_locale=en_US; intl_locale=en_US; intl_common_forever=bd8uyIc/W0f9OCBjDWpStj74AJ49PiOc8EC7ndJ4IQUdhLzdfn3b2g==; aep_usuc_f=region=US&site=glo&b_locale=en_US&c_tp=USD; _ga=GA1.2.1878279729.1466749999; l=At7eaW2D/kursm/uQ76tfZu7rn4gEqMn; ali_apache_track=; ali_apache_tracktmp=",
        "Host": "www.aliexpress.com",
        "Upgrade-Insecure-Requests": "1"
    }
    print(url)
    r = requests.get(url, headers=headers)
    # 解析html
    soup = BeautifulSoup(r.text, 'lxml')
    items_list = soup.find(id='hs-list-items')
    if items_list is None:
        return False
    res_num = 0
    res_sold_out = 0
    for item in items_list.find_all('li'):
        product_id = item['qrdata'].split('|')[1]
        # 直接存usd
        item_value_str = item.find(class_='info').find(class_='value')
        price = float(re_pattern_num.search(item_value_str.get_text(strip=True)).group(0))
        item_num_str = item.find(class_='order-num')
        if item_num_str is None:
            sold_out = 0
        else:
            sold_out = int(re_pattern_num.search(item_num_str.get_text(strip=True)).group(0))
        item_info = item.find(class_='pic').find('img')
        # 记录商品信息
        model_search.insert_one({
            'Pid': pid,
            'ProductId': product_id,
            'SoldOut': sold_out,
            'ItemName': item_info['alt'],
            'Price': float(price),
            # todo 这边加个单位
            'Thumb': item_info['src'] if 'src' in item_info else ''
        })
        # todo 这方法暂时有点的。。防止大量重新跑吧
        sold_out_saved_num = model_aliexpress_order.find({'ProductId': product_id}).count()
        if sold_out > 0 and sold_out_saved_num < sold_out:
            if sold_out_saved_num > 0:
                print("删除旧的")
                model_aliexpress_order.delete_many({'ProductId': product_id})
            n = 0
            while True:
                n += 1
                if not get_order_list_aliexpress(pid, product_id, n):
                    break
                time.sleep(1.5)
        res_sold_out += sold_out
        res_num += 1
    return res_num, res_sold_out


def search_weidian(pid, keyword, page=1):
    # page和limit可以自己调整
    surl = "https://api-z.weidian.com/service/search/items?keyword={keyword}&guid=1466479083407_2668048359360715&page={page}&limit=40"
    url = surl.format(keyword=quote(keyword), page=page)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2767.4 Safari/537.36',
        "Cookie": "WD_guid=1466479083407_2668048359360715; WD_adsk=H5_SEARCH_1975396_CPC; WD_seller=2374158; is_follow_mp=1",
        "Host": "api-z.weidian.com",
        "Upgrade-Insecure-Requests": "1"
    }
    # print(url)
    r = requests.get(url, headers=headers)
    re_data = json.loads(r.text)
    try:
        if len(re_data['result']['items']) == 0:
            return False
    except:
        print(url)
    res_num = 0
    res_sold_out = 0
    for item in re_data['result']['items']:
        model_search.insert_one({
            'Pid': pid,
            'SoldOut': item['soldout'],
            'ItemName': item['itemName'],
            'Price': item['price'],
            'Thumb': item['img']
            # 'DiscountPrice': item['discount_price']
        })
        res_sold_out += item['soldout']
        res_num += 1

    return res_num, res_sold_out


def search_taobao(pid, keyword, page=1):
    # 淘宝坑爹,一页最多40个
    surl = "http://s.m.taobao.com/search?&q={keyword}&n=40&m=api4h5&page={page}"
    url = surl.format(keyword=quote(keyword), page=page)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2767.4 Safari/537.36',
        "Cookie": "t=51156ac3eafc720b27ae96f13aa495fa; cna=G9fqDx4PZUQCATFQEtmvLj17; l=Avb2H4Hv83deoe7ujcHFLwG8xib4FzpR; JSESSIONID=6907E6CE272F794E40245017CB7F0DA1",
        "Host": "s.m.taobao.com",
        "Upgrade-Insecure-Requests": "1"
    }
    print(url)
    r = requests.get(url, headers=headers)
    re_data = json.loads(r.text)
    try:
        if len(re_data['listItem']) == 0:
            return False
    except:
        print(url)
        print(r.text)
        return False
    res_num = 0
    res_sold_out = 0
    for item in re_data['listItem']:
        sold = int(item['sold']) if item['sold'] != '' else 0
        comment = int(item['commentCount']) if item['commentCount'] != '' else 0
        model_search.insert_one({
            'Pid': pid,
            'SoldOut': sold,
            'ItemName': item['name'],
            'Price': float(item['price']),
            'Thumb': item['img2'],
            'CommentCount': comment,
            'Url': item['url']
            # 'OriginalPrice': float(item['originalPrice'])
        })
        res_sold_out += sold
        res_num += 1

    return res_num, res_sold_out


'''
def search_jd(keyword, page=1):
    surl = "http://search.jd.com/s_new.php?keyword={keyword}&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&offset=2&page={page}&s=29&scrolling=y&pos=30"
    url = surl.format(keyword=quote(keyword), page=page)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2767.4 Safari/537.36',
        "Cookie": "__jdv=238571484|www.jd.com|t_pcmtiaozhuan_pcmtiaozhuan|tuiguang|-; sid=faaa888eb95b34ec5e2c5094ea53b2db; __jda=122270672.353062453.1466513385.1466513385.1466513385.2; __jdb=122270672.5.353062453|2.1466513385; __jdc=122270672; mx=0_X; xtest=72327.57127.14ee22eaba297944c96afdbe5b16c65b.14ee22eaba297944c96afdbe5b16c65b; ipLoc-djd=1-72-2799-0; ipLocation=%u5317%u4EAC; __jdu=353062453",
        "Host": "search.jd.com",
        "Upgrade-Insecure-Requests": "1"
    }
    r = requests.get(url, headers=headers)
    # 狗洞的要自己解析html
    soup = BeautifulSoup(r.text, 'lxml')
    res = []
    res_price = 0
    res_sold_out = 0
    try:
        lis = soup.find_all('li')
    except:
        return False
    for li in lis:
        try:
            res_price += float(li.find(class_='p-price').find('i').get_text(strip=True))
            res_sold_out += int(li.find(class_='p-commit').find('a').get_text(strip=True))
        except:
            continue
    return res, res_price, res_sold_out
'''

search_func_map = {
    'weidian': search_weidian,
    'taobao': search_taobao,
    'aliexpress': search_aliexpress
    # 'jingdong': search_jd
}


def statics_data(keyword, source):
    page = 1
    # 统计信息
    num = 0
    sold_out = 0
    target = model_search_log.insert_one({
        'Source': source,
        'Keyword': keyword,
        'RunTimestamp': int(time.time()),
        'RunTimeStr': time_to_str('%Y-%m-%d %H:%M')
    })
    while True:
        print(page)
        page_item = search_func_map[source](target.inserted_id, keyword, page)
        if not page_item:
            break
        page_num, page_sold_out = page_item
        num += page_num
        sold_out += page_sold_out
        page += 1
        time.sleep(5)
    model_search_log.update_one({'_id': target.inserted_id}, {
        '$set': {
            'Num': num,
            'SoldOut': sold_out
        }
    })
    print(str(target.inserted_id))
    with open("data/{keyword}_{source}.csv".format(keyword=keyword, source=source), 'w', encoding='utf-8-sig') as f:
        f.write(
            str(num) + ',' + str(sold_out)
        )
    res = ['标题,价格,销量,comment']
    for item in get_items(target.inserted_id):
        name = item['ItemName'].replace(',', ' ')
        res.append(
            ','.join([
                name, str(item['Price']), str(item['SoldOut']), str(item['CommentCount'])
            ])
        )
    with open("data/{keyword}_items_{source}.csv".format(keyword=keyword, source=source), 'w',
              encoding='utf-8-sig') as f:
        f.write(
            '\n'.join(res)
        )
    time.sleep(10)


if __name__ == '__main__':
    keyword_list = ["前列腺"]
    source_list = ['weidian']
    for keyword in keyword_list:
        for source in source_list:
            statics_data(keyword, source)
